import numpy as np
import pandas as pd
from num2words import num2words
# sklearn
from sklearn import metrics
from sklearn.model_selection import train_test_split
from sklearn.datasets import make_moons, make_circles, make_classification
from sklearn.metrics import mean_squared_error, mean_absolute_error
# Pytorch
import torch
from torch.autograd import Variable
import torchvision.transforms as transforms
# Visualisation libraries
## Text
from colorama import Fore, Back, Style
from IPython.display import Image, display, Markdown, Latex, clear_output
## progressbar
import progressbar
## plotly
from plotly.offline import init_notebook_mode, iplot
import plotly.graph_objs as go
import plotly.offline as py
from plotly.subplots import make_subplots
import plotly.express as px
## seaborn
import seaborn as sns
## matplotlib
import matplotlib.pyplot as plt
from matplotlib.patches import Ellipse, Polygon
from matplotlib.font_manager import FontProperties
import matplotlib.colors as mcolors
from matplotlib import cm
plt.style.use('seaborn-whitegrid')
plt.rcParams['axes.labelsize'] = 14
plt.rcParams['xtick.labelsize'] = 12
plt.rcParams['ytick.labelsize'] = 12
plt.rcParams['text.color'] = 'k'
%matplotlib inline
import warnings
warnings.filterwarnings("ignore")
Artificial neural networks (ANN), also know as connectionist systems, are computing system that is inspired by animal brain neural networks. ANNs are composed of artificial neurons. These neurons are connected using connections. For a given neuron, there are multiple input and output connections.
Artificial neural network components:
There is also an element of (machine) learning. Roughly speaking, learning consists of two parts forward propagation and Backpropagation. Together with a cost function, the algorithm runs iteratively with the object of decreasing Loss and increasing the accuracy.
In this article, we develop a Pytorch Artificial Neural Network model. The number of layers can be adjusted, of course. There are a large number of resources on choosing the right number of hidden layers such as [1]. Generally speaking, we do not recommend adding too many hidden layers. For a large number of problems, one hidden layer is sufficient.
![]()
a random n-class classification dataset can be generated using sklearn.datasets.make_classification. Here, we generate a dataset with two features and 1000 instances. Moreover, the dataset is generated for multiclass classification with five classes.
n_features =2
n_classes = 2
X, y = make_classification(n_samples = int(1e3), n_features = n_features, n_redundant=0, n_classes = n_classes,
n_informative=2, random_state=1, n_clusters_per_class=1)
Labels_dict = dict(zip(list(np.unique(y)), [num2words(x).title() for x in np.unique(y)]))
Data = pd.DataFrame(data = X, columns = ['Feature %i' % (i+1) for i in range(n_features)])
Target = 'Outcome Variable'
Data[Target] = y
display(Data)
def Plot_Data(X, y, ax = None):
# adding margins
lims = float('%1.e' % X.min())-PD['pad'], float('%1.e' % X.max())+PD['pad']
# Figure
if ax == None:
fig, ax = plt.subplots(1, 1, figsize=(PD['FigSize'], PD['FigSize']))
scatter = ax.scatter(X[:,0], X[:,1], s=PD['cricle_size'],
c=y, edgecolor = 'Navy', alpha = PD['alpha'], cmap = PD['ColorMap'])
_ = ax.legend(handles=scatter.legend_elements()[0], labels= PD['Labels'],
fancybox=True, framealpha=1, shadow=True, borderpad=PD['BP'], loc='best', fontsize = 14)
_ = ax.set_xlim(lims)
_ = ax.set_ylim(lims)
_ = ax.set_xlabel('Feature 1')
_ = ax.set_ylabel('Feature 2')
_ = ax.set_aspect(1)
_ = ax.grid(PD['grid'])
PD = dict(BP = .5, alpha=.7, bg_alpha = 0.25, grid = True, cricle_size = 50,
FigSize = 7, h=0.02, pad=1, ColorMap = 'Set1', Labels = list(Labels_dict.values()))
Plot_Data(X, y, ax = None)
| Feature 1 | Feature 2 | Outcome Variable | |
|---|---|---|---|
| 0 | 1.536830 | -1.398694 | 1 |
| 1 | 1.369176 | -0.637344 | 1 |
| 2 | 0.502318 | -0.459105 | 1 |
| 3 | 1.833193 | -1.298082 | 1 |
| 4 | 1.042356 | 1.121529 | 0 |
| ... | ... | ... | ... |
| 995 | 0.535224 | 0.435245 | 1 |
| 996 | 1.069692 | -0.129909 | 1 |
| 997 | 1.820267 | -2.957167 | 1 |
| 998 | 1.004999 | 0.936290 | 0 |
| 999 | 1.462110 | 1.144978 | 0 |
1000 rows × 3 columns
def DatasetTargetDist(Inp, Target, Labels_dict, PD):
# Table
Table = Inp[Target].value_counts().to_frame('Count').reset_index(drop = False).rename(columns = {'index':Target})
Table[Target] = Table[Target].replace(Labels_dict)
Table['Percentage'] = np.round(100*(Table['Count']/Table['Count'].sum()),2)
fig = make_subplots(rows=1, cols=2, horizontal_spacing = 0.02, column_widths=PD['column_widths'],
specs=[[{"type": "table"},{"type": "pie"}]])
# Right
fig.add_trace(go.Pie(labels=Table[Target].values, values=Table['Count'].values,
pull=PD['pull'], textfont=dict(size= PD['textfont']),
marker=dict(colors = PD['PieColors'], line=dict(color='black', width=1))), row=1, col=2)
fig.update_traces(hole=PD['hole'])
fig.update_layout(height = PD['height'], legend=dict(orientation="v"), legend_title_text= PD['legend_title'])
# Left
T = Table.copy()
T['Percentage'] = T['Percentage'].map(lambda x: '%%%.2f' % x)
Temp = []
for i in T.columns:
Temp.append(T.loc[:,i].values)
fig.add_trace(go.Table(header=dict(values = list(Table.columns), line_color='darkslategray',
fill_color= PD['TableColors'][0], align=['center','center'],
font=dict(color='white', size=12), height=25), columnwidth = PD['tablecolumnwidth'],
cells=dict(values=Temp, line_color='darkslategray',
fill=dict(color= [PD['TableColors'][1], PD['TableColors'][1]]),
align=['center', 'center'], font_size=12, height=20)), 1, 1)
fig.update_layout(title={'text': '<b>' + Target + '<b>', 'x':PD['title_x'],
'y':PD['title_y'], 'xanchor': 'center', 'yanchor': 'top'})
fig.show()
PD = dict(PieColors = ['OrangeRed', 'RoyalBlue'], TableColors = ['Navy','White'], hole = .4,
column_widths=[0.6, 0.4],textfont = 14, height = 350, tablecolumnwidth = [0.32, 0.15, 0.15],
pull = [0.05], legend_title = Target, title_x = 0.5, title_y = 0.8)
DatasetTargetDist(Data, Target, Labels_dict, PD)
A multi-layer perceptron (MLP) is a class of feedforward artificial neural network (ANN). The algorithm at each iteration uses the Cross-Entropy Loss to measure the loss, and then the gradient and the model update is calculated. At the end of this iterative process, we would reach a better level of agreement between test and predicted sets since the error would be lower from that of the first step.
One of the efficient methods of splitting a dataset into random train and test subsets is using sklearn.model_selection.train_test_split.
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)
pd.DataFrame(data={'Set':['X_train','X_test','y_train','y_test'],
'Shape':[X_train.shape, X_test.shape, y_train.shape, y_test.shape]}).set_index('Set').T
| Set | X_train | X_test | y_train | y_test |
|---|---|---|---|---|
| Shape | (700, 2) | (300, 2) | (700,) | (300,) |
def Header(Text, L = 100, C = 'Blue', T = 'White'):
BACK = {'Black': Back.BLACK, 'Red':Back.RED, 'Green':Back.GREEN, 'Yellow': Back.YELLOW, 'Blue': Back.BLUE,
'Magenta':Back.MAGENTA, 'Cyan': Back.CYAN}
FORE = {'Black': Fore.BLACK, 'Red':Fore.RED, 'Green':Fore.GREEN, 'Yellow':Fore.YELLOW, 'Blue':Fore.BLUE,
'Magenta':Fore.MAGENTA, 'Cyan':Fore.CYAN, 'White': Fore.WHITE}
print(BACK[C] + FORE[T] + Style.NORMAL + Text + Style.RESET_ALL + ' ' + FORE[C] +
Style.NORMAL + (L- len(Text) - 1)*'=' + Style.RESET_ALL)
def Line(L=100, C = 'Blue'):
FORE = {'Black': Fore.BLACK, 'Red':Fore.RED, 'Green':Fore.GREEN, 'Yellow':Fore.YELLOW, 'Blue':Fore.BLUE,
'Magenta':Fore.MAGENTA, 'Cyan':Fore.CYAN, 'White': Fore.WHITE}
print(FORE[C] + Style.NORMAL + L*'=' + Style.RESET_ALL)
def Search_List(Key, List): return [s for s in List if Key in s]
def TorchSets(Set):
# Inut: Arrays
# GPU Cuda
if isinstance(Set, (pd.DataFrame, pd.Series)):
Set = Set.values
if torch.cuda.is_available():
if Set.ndim==1:
Out = Variable(torch.from_numpy(Set).type(torch.LongTensor).cuda())
else:
Out = Variable(torch.from_numpy(Set).cuda())
# CPU
else:
if Set.ndim==1:
Out = Variable(torch.from_numpy(Set).type(torch.LongTensor))
else:
Out = Variable(torch.from_numpy(Set))
return Out
# Tensors
X_train_tensor = TorchSets(X_train)
y_train_tensor = TorchSets(y_train)
X_test_tensor = TorchSets(X_test)
y_test_tensor = TorchSets(y_test)
Batch_size = 100
iteration_number = int(3e4)
epochs_number = int(iteration_number / (len(X_train) / Batch_size))
# Pytorch train and test sets
Train_set = torch.utils.data.TensorDataset(X_train_tensor, y_train_tensor)
Test_set = torch.utils.data.TensorDataset(X_test_tensor, y_test_tensor)
# data loader
train_loader = torch.utils.data.DataLoader(Train_set, batch_size = Batch_size, shuffle = False)
test_loader = torch.utils.data.DataLoader(Train_set, batch_size = Batch_size, shuffle = False)
class MLP_Model(torch.nn.Module):
'''
A MLP model with two hidden layers
'''
def __init__(self, input_Size, hidden_Size, output_Size):
super(MLP_Model, self).__init__()
# Input Layer to the 1st Layer:
self.fc1 = torch.nn.Linear(input_Size, hidden_Size)
torch.nn.init.kaiming_uniform_(self.fc1.weight, nonlinearity='relu')
self.act1 = torch.nn.ReLU()
# 1st Layer to 2nd Layer
self.fc2 = torch.nn.Linear(hidden_Size, int(hidden_Size/4))
torch.nn.init.kaiming_uniform_(self.fc2.weight, nonlinearity='relu')
self.act2 = torch.nn.ReLU()
# 2nd layer to Output Layer
self.fc3 = torch.nn.Linear(int(hidden_Size/4), output_Size)
torch.nn.init.kaiming_uniform_(self.fc3.weight)
self.act3 = torch.nn.Sigmoid()
def forward(self, x):
# Input Layer to the 1st Layer:
out = self.fc1(x)
# Non-linearity 1
out = self.act1(out)
# 1st Layer to 2nd Layer
out = self.fc2(out)
out = self.act2(out)
# 2nd layer to Output Layer
out = self.fc3(out)
out = self.act3(out)
return out
def Plot_history(history, Table_Rows = 25, yLim = 2):
fig = make_subplots(rows=1, cols=2, horizontal_spacing = 0.02, column_widths=[0.6, 0.4],
specs=[[{"type": "scatter"},{"type": "table"}]])
# Left
fig.add_trace(go.Scatter(x= history['Iteration'].values, y= history['Loss'].astype(float).values.round(4),
line=dict(color='OrangeRed', width= 1.5), name = 'Loss'), 1, 1)
fig.add_trace(go.Scatter(x= history['Iteration'].values, y= history['Accuracy'].astype(float).values,
line=dict(color='MidnightBlue', width= 1.5), name = 'Accuracy'), 1, 1)
fig.update_layout(legend=dict(x=0, y=1.1, traceorder='reversed', font_size=12),
dragmode='select', plot_bgcolor= 'white', height=600, hovermode='closest',
legend_orientation='h')
fig.update_xaxes(range=[history.Iteration.min(), history.Iteration.max()],
showgrid=True, gridwidth=1, gridcolor='Lightgray',
showline=True, linewidth=1, linecolor='Lightgray', mirror=True, row=1, col=1)
fig.update_yaxes(range=[0, yLim], showgrid=True, gridwidth=1, gridcolor='Lightgray',
showline=True, linewidth=1, linecolor='Lightgray', mirror=True, row=1, col=1)
# Right
ind = np.linspace(0, history.index[-1], Table_Rows, endpoint = True).round(0).astype(int)
T = history[history.index.isin(ind)]
T[['Loss','Accuracy']] = T[['Loss','Accuracy']].applymap(lambda x: '%.4e' % x)
Temp = []
for i in T.columns:
Temp.append(T.loc[:,i].values)
fig.add_trace(go.Table(header=dict(values = list(history.columns), line_color='darkslategray',
fill_color='Navy', align=['center','center'],
font=dict(color='white', size=12), height=25), columnwidth = [0.4, 0.4, 0.4],
cells=dict(values=Temp, line_color='darkslategray',
fill=dict(color=['Lavender', 'white', 'white']),
align=['center', 'center'], font_size=12,height=20)), 1, 2)
fig.show()
Fitting the model
input_Size, output_Size = n_features, len(Labels_dict)
hidden_Size = 256
# model
model = MLP_Model(input_Size, hidden_Size, output_Size)
# GPU
if torch.cuda.is_available():
model.cuda()
# Cross Entropy Loss
criterion= torch.nn.CrossEntropyLoss()
# Optimizer
optimizer = torch.optim.SGD(model.parameters(), lr= 1e-2, momentum=.9)
# Traning the Model
Count = 0
Loss_list = []
Iteration_list = []
Accuracy_list = []
MSE_list = []
MAE_list = []
Steps = 10
Progress_Bar = progressbar.ProgressBar(maxval= iteration_number + 200,
widgets=[progressbar.Bar('=', '|', '|'),
progressbar.Percentage()])
# print('---------------------------------------------------------')
for epoch in range(epochs_number):
for i, (Xtr, ytr) in enumerate(train_loader):
# Variables
Xtr = Variable(Xtr.view(-1, n_features))
ytr = Variable(ytr)
# Set all gradients to zero
optimizer.zero_grad()
# Forward
Out = model(Xtr.float())
# loss
loss = criterion(Out, ytr.long())
# Backward (Calculating the gradients)
loss.backward()
# Update parameters
optimizer.step()
Count += 1
del Xtr, ytr
# Predictions
if Count % Steps == 0:
# Calculate Accuracy
Correct, Total = 0, 0
# Predictions
for Xts, yts in test_loader:
Xts = Variable(Xts.view(-1, n_features))
# Forward
Out = model(Xts.float())
# The maximum value of Out
Predicted = torch.max(Out.data, 1)[1]
# Total number of yts
Total += len(yts)
# Total Correct predictions
Correct += (Predicted == yts).sum()
del Xts, yts
# storing loss and iteration
Loss_list.append(loss.data)
Iteration_list.append(Count)
Accuracy_list.append(Correct / float(Total))
Progress_Bar.update(Count)
Progress_Bar.finish()
history = pd.DataFrame({'Iteration': np.array(Iteration_list),
'Loss': np.array([x.cpu().data.numpy() for x in Loss_list]),
'Accuracy': np.array([x.cpu().data.numpy() for x in Accuracy_list])})
del Loss_list, Iteration_list, Accuracy_list
|=========================================================================|100%
Model Performance
Plot_history(history, Table_Rows = 30, yLim = 1)
def Plot_Classification(Model, X, y, ax = None):
# adding margins
x_min, x_max = float('%1.e' % X.min())-PD['pad'], float('%1.e' % X.max())+PD['pad']
y_min, y_max = float('%1.e' % X.min())-PD['pad'], float('%1.e' % X.max())+PD['pad']
# Generating meshgrids
xx, yy = np.meshgrid(np.arange(x_min, x_max, PD['h']), np.arange(y_min, y_max, PD['h']))
Temp = np.c_[xx.ravel(), yy.ravel()]
Temp = TorchSets(Temp)
# Predictions
Pred = Model(Temp.float())
Pred = torch.max(Pred.data, 1)[1]
Pred = Pred.cpu().data.numpy()
Pred = Pred.reshape(xx.shape)
# Figure
if ax == None:
fig, ax = plt.subplots(1, 1, figsize=(PD['FigSize'], PD['FigSize']))
_ = ax.contourf(xx, yy, Pred, cmap = PD['ColorMap'], alpha=PD['bg_alpha'])
scatter = ax.scatter(X[:,0], X[:,1], s=PD['cricle_size'],
c=y, edgecolor = 'Navy', alpha = PD['alpha'], cmap = PD['ColorMap'])
_ = ax.legend(handles=scatter.legend_elements()[0], labels= PD['Labels'],
fancybox=True, framealpha=1, shadow=True, borderpad=PD['BP'], loc='best', fontsize = 14)
_ = ax.set_xlim(x_min, x_max)
_ = ax.set_ylim(y_min, y_max)
_ = ax.set_xlabel('Feature 1')
_ = ax.set_ylabel('Feature 2')
_ = ax.set_aspect(1)
_ = ax.grid(PD['grid'])
#
PD = dict(BP = .5, alpha=.7, bg_alpha = 0.25, grid = False, cricle_size = 50,
FigSize = 7, h=0.02, pad=1, ColorMap = 'Set1', Labels = list(Labels_dict.values()))
fig, ax = plt.subplots(1, 2, figsize=(16, 7))
# Train Set
Plot_Classification(model, X_train, y_train, ax = ax[0])
_ = ax[0].set_title('Train Set', fontsize = 16)
# Test Set
Plot_Classification(model, X_test, y_test, ax = ax[1])
_ = ax[1].set_title('Test Set', fontsize = 16)
The confusion matrix allows for visualization of the performance of an algorithm. Note that due to the size of data, here we don't provide a Cross-validation evaluation. In general, this type of evaluation is preferred.
def Confusion_Mat(CM_Train, CM_Test, PD, n_splits = 10):
if n_splits == None:
Titles = ['Train Set', 'Test Set']
else:
Titles = ['Train Set (CV = % i)' % n_splits, 'Test Set (CV = % i)' % n_splits]
CM = [CM_Train, CM_Test]
Cmap = ['Greens', 'YlGn','Blues', 'PuBu']
for i in range(2):
fig, ax = plt.subplots(1, 2, figsize= PD['FS'])
fig.suptitle(Titles[i], weight = 'bold', fontsize = 16)
_ = sns.heatmap(CM[i], annot=True, annot_kws={"size": PD['annot_kws']}, cmap=Cmap[2*i], ax = ax[0],
linewidths = 0.2, cbar_kws={"shrink": PD['shrink']})
_ = ax[0].set_title('Confusion Matrix');
Temp = np.round(CM[i].astype('float') / CM[i].sum(axis=1)[:, np.newaxis], 2)
_ = sns.heatmap(Temp,
annot=True, annot_kws={"size": PD['annot_kws']}, cmap=Cmap[2*i+1], ax = ax[1],
linewidths = 0.4, vmin=0, vmax=1, cbar_kws={"shrink": PD['shrink']})
_ = ax[1].set_title('Normalized Confusion Matrix');
for a in ax:
_ = a.set_xlabel('Predicted labels')
_ = a.set_ylabel('True labels');
_ = a.xaxis.set_ticklabels(PD['Labels'])
_ = a.yaxis.set_ticklabels(PD['Labels'])
_ = a.set_aspect(1)
# Train
y_pred = model(X_train_tensor.float())
y_pred = torch.max(y_pred.data, 1)[1]
y_pred = y_pred.cpu().data.numpy()
Reports_Train = pd.DataFrame(metrics.classification_report(y_train, y_pred, target_names=list(Labels_dict.values()),
output_dict=True)).T
CM_Train = metrics.confusion_matrix(y_train, y_pred)
# Test
y_pred = model(X_test_tensor.float())
y_pred = torch.max(y_pred.data, 1)[1]
y_pred = y_pred.cpu().data.numpy()
Reports_Test = pd.DataFrame(metrics.classification_report(y_test, y_pred, target_names=list(Labels_dict.values()),
output_dict=True)).T
CM_Test = metrics.confusion_matrix(y_test, y_pred)
Reports_Train = Reports_Train.reset_index().rename(columns ={'index': 'Train Set'})
Reports_Test = Reports_Test.reset_index().rename(columns ={'index': 'Test Set'})
display(Reports_Train.style.hide_index().set_properties(**{'background-color': 'HoneyDew', 'color': 'Black'}).\
set_properties(subset=['Train Set'], **{'background-color': 'SeaGreen', 'color': 'White'}))
display(Reports_Test.style.hide_index().set_properties(**{'background-color': 'Azure', 'color': 'Black'}).\
set_properties(subset=['Test Set'], **{'background-color': 'RoyalBlue', 'color': 'White'}))
PD = dict(FS = (10, 6), annot_kws = 14, shrink = .6, Labels = list(Labels_dict.values()))
Confusion_Mat(CM_Train, CM_Test, PD = PD, n_splits = None)
| Train Set | precision | recall | f1-score | support |
|---|---|---|---|---|
| Zero | 0.942857 | 0.956522 | 0.949640 | 345.000000 |
| One | 0.957143 | 0.943662 | 0.950355 | 355.000000 |
| accuracy | 0.950000 | 0.950000 | 0.950000 | 0.950000 |
| macro avg | 0.950000 | 0.950092 | 0.949997 | 700.000000 |
| weighted avg | 0.950102 | 0.950000 | 0.950003 | 700.000000 |
| Test Set | precision | recall | f1-score | support |
|---|---|---|---|---|
| Zero | 0.947020 | 0.922581 | 0.934641 | 155.000000 |
| One | 0.919463 | 0.944828 | 0.931973 | 145.000000 |
| accuracy | 0.933333 | 0.933333 | 0.933333 | 0.933333 |
| macro avg | 0.933241 | 0.933704 | 0.933307 | 300.000000 |
| weighted avg | 0.933701 | 0.933333 | 0.933351 | 300.000000 |